# -------------------------------------------------------------------
#       -*- coding: utf-8 -*-
#   @Project    :   spider_biqushen
#   @File       :   bishuge_qun.py
#   @Author     :   WANGYU
#   @Time       :   2021-09-02 08:55:00
#   @Software   :   PyCharm
#   @Desc       :   
# -------------------------------------------------------------------


import requests
import re
from bs4 import BeautifulSoup


def get_book():
    n = 0
    sum = 0
    pages = ''

    while sum < 71:
        url = 'https://www.biqushen.cc/book/26585/'+str(sum)+'.html'
        respon = requests.get(url).text
        page = re.findall(r'<div id="BookText">\n.+?.*',respon)
        title = re.findall(r'<title>.+?.*</title>',respon)[0].strip('<title></title>')
        pags1 = page[0].strip('<div>/<div id="BookText-->><p class="to_nextpage"><a href=" ">本章未完，点击下一页继续阅读</a ></p">\n').replace('<br/>', '').replace('    ','\n')
        with open('./files/'+title+'.txt','w',encoding='utf-8') as fp1:
            fp1.write(pags1)


        #爬取第n篇所有字文章,暂定一篇文章的子文章有10篇
        while n < 10:
            url_son = 'https://www.biqushen.cc/book/26585/'+str(sum)+'_'+str(n)+'.html'
            response = requests.get(url_son).content.decode('utf-8')
            if '<br/>' not in response:
                break
            else:
                page2 = re.findall(r'<div id="BookText">\n.+?.*', response)
                pags = page2[0].strip('<div>/<div id="BookText-->><p class="to_nextpage"><a href="/book/26585/0_1.html">本章未完，点击下一页继续阅读</a ></p">\n').replace('<br/>', '').replace('    ','\n')
                with open('./files/'+title+'第'+str(n)+'部分.txt','w',encoding='utf-8') as fp:
                    fp.write(pags)
                print(title+str(n)+'.down')
                n += 1

        sum += 1




if __name__ == '__main__':
    get_book()